from collections import defaultdict
import time
import random
import numpy as np


def test(train_file, test_file):
    w2i = defaultdict(lambda: len(w2i))
    t2i = defaultdict(lambda: len(t2i))
    UNK = w2i["<unk>"]

    def read_dataset(filename):
      with open(filename, "r") as f:
        for line in f:
          tag, words = line.lower().strip().split(" ||| ")
          yield ([w2i[x] for x in words.split(" ")], t2i[tag])

    # Read in the data
    train = list(read_dataset(train_file))
    w2i = defaultdict(lambda: UNK, w2i)
    dev = list(read_dataset(test_file))
    nwords = len(w2i)
    ntags = len(t2i)